import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np
from scipy import stats
import sys
sys.path.append(sys.argv[1])

import pandas as pd   
import pickle

   
# Load the dataset   
credit_customers = pd.read_csv(os.path.join(sys.argv[1], 'credit_customers.csv')) 
  
# Descriptive statistics for age   

age_mean = credit_customers['age'].mean()   

age_median = credit_customers['age'].median()   

age_mode = credit_customers['age'].mode()[0]   

age_range = credit_customers['age'].max() - credit_customers['age'].min()
  
print(f"Age - Mean: {age_mean}, Median: {age_median}, Mode: {age_mode}, Range: {age_range}")  
# pickle.dump(age_mean, open("./ref_result/age_mean.pkl","wb"))
# pickle.dump(age_median, open("./ref_result/age_median.pkl","wb"))
# pickle.dump(age_mode, open("./ref_result/age_mode.pkl","wb"))
# pickle.dump(age_range, open("./ref_result/age_range.pkl","wb"))

  

import pandas as pd   
import matplotlib.pyplot as plt   
import seaborn as sns

# Load the dataset   
  
# Distribution plot for age  
plt.figure(figsize=(10, 6))   
sns.histplot(credit_customers['age'], kde=True, bins=20)   
plt.title("Age Distribution")   
plt.xlabel("Age")   
plt.ylabel("Frequency")   
plt.savefig('ref_result/histplot.png')
# plt.show()  


import pandas as pd   
import pickle

# Load the dataset   
  
# Distribution plot for age  
employment_counts = credit_customers['employment'].value_counts()
print("\nEmployment Status Distribution:")  
print(employment_counts) 
# pickle.dump(employment_counts,open("./ref_result/employment_counts.pkl","wb"))

import pandas as pd  
import matplotlib.pyplot as plt  
import seaborn as sns  
   
# Load the dataset   
   
# Distribution plot for employment status  
plt.figure(figsize=(10, 6))   
sns.countplot(x='employment', data=credit_customers, order=credit_customers['employment'].value_counts().index)   
plt.title("Employment Status Distribution")   
plt.xlabel("Employment Status")   
plt.ylabel("Frequency")   
plt.savefig('ref_result/countplot.png')  
# plt.show()  


import pandas as pd   
import pickle

# Load the dataset   
  

credit_history_counts = credit_customers['credit_history'].value_counts()   
print("\nCredit History Distribution:")  
print(credit_history_counts)  
# pickle.dump(credit_history_counts,open("./ref_result/credit_history_counts.pkl","wb"))

import pandas as pd   
import matplotlib.pyplot as plt  
import seaborn as sns  

   
# Load the dataset   

  
# Distribution plot for credit history  
plt.figure(figsize=(10, 6))   
sns.countplot(x='credit_history', data=credit_customers, order=credit_customers['credit_history'].value_counts().index)   
plt.title("Credit History Distribution")   
plt.xlabel("Credit History")   
plt.ylabel("Frequency")   
plt.xticks(rotation=45)    
plt.savefig('ref_result/countplot_2.png') 
# plt.show()  


